
rm(list = ls())

#### Install packages if needed 
# All libraries necessary
load.lib<-c("vtable", "ggplot2", "dplyr", "tidyr", "stargazer", "coefplot", "broom", "dotwhisker", 
            "ggpubr", "broom.helpers", "stringr", "lubridate", "margins", "xtable")

# select only the packages that aren't currently installed.

install.lib <- load.lib[!(load.lib %in% installed.packages())]

# install the missing packages, including their dependency.
for(lib in install.lib){
  install.packages(lib,dependencies=TRUE) 
} 


####packages needed for the analysis 

library(vtable)
library(dplyr)
library(ggplot2)
library(tidyr)
library(coefplot)
library(ggpubr)
library(dotwhisker)
library(stargazer)
library(broom)
library(broom.helpers)
library(stringr)
library(lubridate)
library(margins)
library(xtable)


###FOR REPLICATION, YOU NEED TO WRITE DOWN THE PATH TO THE DATA HERE!
path <- ".../"



##Reading in data 

dat<- readr::read_csv(
  paste0(path, "replication_dat_austria.csv"))




dat_it<- readr::read_csv(
  paste0(path, "replication_dat_it.csv"))





dat <- dat %>% 
  dplyr::rename(age=Q2, 
                gender = Q3, 
                edu = Q4, 
                urban_rural = Q5, 
                occupation_stat = Q6, 
                migrat_back = Q7, 
                degree_travel = Q8, 
                nat_id_alt = Q9_2, 
                eu_id_alt = Q9_3, 
                most_important_issue = Q10,
                party_vote = Q33, 
                import_country = Q15, 
                typ_country = Q16,
                express_country = Q17, 
                we_country = Q18, 
                import_eu = Q19, 
                typ_eu = Q20, 
                express_eu = Q21, 
                we_eu = Q22, 
                close_unempl = Q23_6,
                close_women = Q23_1, 
                close_men = Q23_2, 
                close_rural = Q23_5, 
                close_urban = Q23_7, 
                close_old = Q23_11,
                close_young = Q23_12, 
                close_uni = Q23_14, 
                close_no_uni = Q23_15, 
                sort_young = Q67_1, 
                sort_old = Q67_2, 
                sort_uni = Q67_3, 
                sort_no_uni = Q67_4, 
                sort_rural = Q67_5, 
                sort_urban = Q67_6, 
                sort_unemployed = Q67_7, 
                sort_women= Q67_8, 
                sort_men=Q67_9, 
                sort_id_young = Q67_1_num, 
                sort_id_old = Q67_2_num, 
                sort_id_uni = Q67_3_num, 
                sort_id_no_uni = Q67_4_num, 
                sort_id_rural = Q67_5_num, 
                sort_id_urban = Q67_6_num, 
                sort_id_unemployed = Q67_7_num, 
                sort_id_women= Q67_8_num, 
                sort_id_men=Q67_9_num)


dat_it <- dat_it %>% 
  dplyr::rename(age=Q2, 
                gender = Q3, 
                edu = Q4, 
                urban_rural = Q5, 
                occupation_stat = Q6, 
                migrat_back = Q7, 
                degree_travel = Q8, 
                nat_id_alt = Q9_2, 
                eu_id_alt = Q9_3, 
                most_important_issue = Q10,
                party_vote = Q33, 
                import_country = Q15, 
                typ_country = Q16,
                express_country = Q17, 
                we_country = Q18, 
                import_eu = Q19, 
                typ_eu = Q20, 
                express_eu = Q21, 
                we_eu = Q22, 
                close_unempl = Q23_6,
                close_women = Q23_1, 
                close_men = Q23_2, 
                close_rural = Q23_5, 
                close_urban = Q23_7, 
                close_old = Q23_11,
                close_young = Q23_12, 
                close_uni = Q23_14, 
                close_no_uni = Q23_15, 
                sort_young = Q69_1, 
                sort_old = Q69_2, 
                sort_uni = Q69_3, 
                sort_no_uni = Q69_4, 
                sort_rural = Q69_5, 
                sort_urban = Q69_6, 
                sort_unemployed = Q69_7, 
                sort_women= Q69_8, 
                sort_men=Q69_9, 
                sort_id_young = Q69_1_num, 
                sort_id_old = Q69_2_num, 
                sort_id_uni = Q69_3_num, 
                sort_id_no_uni = Q69_4_num, 
                sort_id_rural = Q69_5_num, 
                sort_id_urban = Q69_6_num, 
                sort_id_unemployed = Q69_7_num, 
                sort_id_women= Q69_8_num, 
                sort_id_men=Q69_9_num)

###Create props of group closeness


variables <- c("sort_young", "sort_old", "sort_uni", "sort_no_uni", "sort_rural", "sort_urban", "sort_unemployed", "sort_women", "sort_men")


group_props<- dat %>% 
  dplyr::select(Group, !!variables) %>% 
  pivot_longer(cols = variables) %>%
  group_by_at(1:3) %>% 
  summarise(count = n()) %>% 
  group_by_at(1:2) %>% 
  mutate(prop = count/sum(count), 
         upperE=qnorm(0.975)*sqrt(count/sum(count))*(1-(count/sum(count)))/count, 
         lowerE=-qnorm(0.975)*sqrt(count/sum(count))*(1-(count/sum(count)))/count) 


group_props <- group_props %>% 
  filter(value!="")


group_props_all_at <- group_props %>% 
  dplyr::group_by(name) %>% 
  mutate(count_all = sum(count)) %>% 
  group_by(name, value) %>% 
  mutate(count_sort = sum(count)) %>% 
  mutate(prop_all = count_sort/count_all) %>% 
  mutate(upperE_all=qnorm(0.975)*sqrt((prop_all*(1-prop_all))/count_all), 
         lowerE_all=-qnorm(0.975)*sqrt((prop_all*(1-prop_all))/count_all)) %>% 
  filter(Group=="Remain Control"&value!="Ausgeglichen")

labels <- c(`sort_young`="Young", 
            `sort_old`="Old",
            `sort_uni`="University degree", 
            `sort_no_uni`="No university degree", 
            `sort_rural`= "Rural", 
            `sort_urban`="Urban", 
            `sort_unemployed`="Unemployed", 
            `sort_women`="Women", 
            `sort_men`= "Men")
group_labs <- c("Group \nis split", "Group mainly \nLeave", "Group mainly \nRemain")
vote_labs <- c(`Don't Know Control`="Don't Know",
               `Leave Control` = "Voted Leave", 
               `Remain Control` = "Voted Remain")

####Figure 3 main paper Austria 


group_labs_2 <- c("Group mainly \nLeave", "Group mainly \nRemain")



ggplot(group_props_all_at, aes(fill=value,x=factor(name, levels = variables), y=prop_all))+
  geom_bar(position="dodge", stat="identity")+
  geom_errorbar(aes(ymin = prop_all + lowerE_all, ymax= prop_all + upperE_all), position=position_dodge(.9), width=.2)+
  labs(x= "", y= "Share of Respondents")+
  scale_fill_manual("Perceived \nsorting of \nsocial group", labels = group_labs_2, values=c("grey30", "grey70"))+
  ylim(0, 0.8)+
  scale_x_discrete(labels = labels)+
  coord_flip()+
  theme_minimal()


ggsave(width = 16,
       height = 10,
       units = "cm", 
       dpi = 300,
       device="png",
       filename = "vote_prot_at_2.png", 
       path = path)




####Sympathy towards different social groups 


variables_groups <- c("close_women", "close_men", "close_rural",  "close_urban", "close_unempl","close_old", "close_young", "close_uni", "close_no_uni")
soc_group <- c(`close_old`="Old", 
               `close_young`="Young", 
               `close_uni`="University degree", 
               `close_no_uni`= "No university degree", 
               `close_women`="Women", 
               `close_men`= "Men", 
               `close_rural`= "Rural", 
               `close_urban`="Urban",
               `close_unempl`="Unemployed")
               

group_soc <- dat %>% 
  dplyr::select(Group, !!variables_groups) %>% 
  pivot_longer(cols = variables_groups) %>%
  filter(value!="") %>% 
  group_by(Group, name) %>% 
  summarise(mean = mean(as.numeric(value)),
            sd = sd(as.numeric(value)), 
            n = n()) %>% 
  mutate(ci_upper = mean + (qnorm(0.975)*(sd/sqrt(n))),
         ci_lower = mean - (qnorm(0.975)*(sd/sqrt(n))))


###Figure 2 main paper Austria

ggplot(group_soc, aes(fill=Group ,x= factor(name, levels = variables_groups), y=mean))+
  geom_bar(position="dodge", stat="identity")+
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), position=position_dodge(.9), width=.2)+
  scale_fill_manual(values=c("grey30", "grey50", "grey70"), labels = c("Don't Know", " Leave", "Remain"), name= "Respondent’s own \nvote choice")+
  scale_x_discrete(labels=soc_group)+
  labs(y="Closeness", x = "")+
  ylim(0,10)+
  coord_flip()+
  theme_minimal()

ggsave(width = 16,
       height = 10,
       units = "cm", 
       dpi = 300,
       device="png",
       filename = "soc_close_at.png", 
       path = path)






full_vote_mod <- glm(vote_remain~ close_all + as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat, family = binomial(link="logit"))

full_vote_leave <- glm(vote_leave~close_all+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat, family = binomial(link="logit"))






mod_eu_0 <- lm(european ~ close_all, dat=dat)
mod_eu_1 <- lm(european ~ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat)
mod_eu_2 <- lm(european ~ close_all + as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat)
mod_eu_3 <- lm(european ~ as.numeric(close_young)+ as.numeric(close_old)+ as.numeric(close_uni) + as.numeric(close_no_uni) + as.numeric(close_rural) + as.numeric(close_urban)+ as.numeric(close_unempl) + as.numeric(close_women)+ as.numeric(close_men)+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat)


#### Table 2 main paper and Table 7 in appendix

stargazer(mod_eu_0, mod_eu_1, mod_eu_2, mod_eu_3,style = "apsr", type="latex", out = paste0(path, "eu_main_models_at.tex"),
          column.labels = c("Model 1", "Model 2", "Model 3", "Model 4"),
          keep = c("close_all", "close_young", "close_old", "close_uni", "close_no_uni", "close_rural", "close_urban", "close_unempl", "close_women", "close_men"), 
          covariate.labels = c("Group closeness score", "Young", "Old", "University", "No University", "Rural", "Urban", "Unemployed", "Women", "Men"))


stargazer( mod_eu_1, mod_eu_2, mod_eu_3,style = "apsr", type="latex", out = paste0(path, "eu_main_models_at_long.tex"),
          column.labels = c( "Model 2", "Model 3", "Model 4"))

mod_nat_1 <- lm(national ~  as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat)

mod_nat_2 <- lm(national ~ close_all+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat)

mod_nat_3 <- lm(national ~ as.numeric(close_young)+ as.numeric(close_old)+ as.numeric(close_uni) + as.numeric(close_no_uni) + as.numeric(close_rural) + as.numeric(close_urban)+ as.numeric(close_unempl) + as.numeric(close_women)+ as.numeric(close_men)+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat)

mod_nat_0 <- lm(national ~ close_all, dat=dat)


####Second half of table 2

stargazer(mod_nat_0, mod_nat_1, mod_nat_2, mod_nat_3, style = "apsr",type="latex", out = paste0(path, "nat_main_models_at.tex"),
          column.labels = c("Model 1", "Model 2", "Model 3", "Model 4"),
          keep = c("close_all", "close_young", "close_old", "close_uni", "close_no_uni", "close_rural", "close_urban", "close_unempl", "close_women", "close_men"),
          covariate.labels = c("Group closeness score", "Young", "Old", "University", "No University", "Rural", "Urban", "Unemployed", "Women", "Men"))


stargazer(mod_nat_1, mod_nat_2, mod_nat_3, style = "apsr",type="latex", out = paste0(path, "nat_main_models_at_long.tex"),
          column.labels = c("Model 2", "Model 3", "Model 4"))





###Create props of group closeness


group_props_it<- dat_it %>% 
  dplyr::select(Group, !!variables) %>% 
  pivot_longer(cols = variables) %>%
  group_by_at(1:3) %>% 
  summarise(count = n()) %>% 
  group_by_at(1:2) %>% 
  mutate(prop = count/sum(count), 
         upperE=1.96*sqrt(count/sum(count))*(1-(count/sum(count)))/count, 
         lowerE=-1.96*sqrt(count/sum(count))*(1-(count/sum(count)))/count)

group_props_it <- group_props_it %>% 
  filter(value!="")




group_props_it$value <- factor(group_props_it$value, levels = c("Voterebbe equamente", "Prevalentemente a favore dell' uscita dall'UE", "Prevalentemente a favore della permanenza nell'UE"))

group_props_all <- group_props_it %>% 
  dplyr::group_by(name) %>% 
  mutate(count_all = sum(count)) %>% 
  group_by(name, value) %>% 
  mutate(count_sort = sum(count)) %>% 
  mutate(prop_all = count_sort/count_all) %>% 
  mutate(upperE_all=qnorm(0.975)*sqrt((prop_all*(1-prop_all))/count_all), 
         lowerE_all=-qnorm(0.975)*sqrt((prop_all*(1-prop_all))/count_all)) %>% 
  filter(Group=="Remain Control"&value!= "Voterebbe equamente")



labels <- c("Young", "Old", "University degree", "No university degree", "Rural", "Urban", "Unemployed", "Women", "Men")

vote_labs <- c(`Don't Know Control`="Don't Know",
               `Leave Control` = "Voted Leave", 
               `Remain Control` = "Voted Remain")

####Graph of how different voter groups locate social groups to referenda positions (Figure 3 main paper Italy)


ggplot(group_props_all, aes(fill=value, x=factor(name, levels = variables), y=prop_all))+
  geom_bar(position="dodge", stat="identity")+
  geom_errorbar(aes(ymin = prop_all + lowerE_all, ymax= prop_all + upperE_all), position=position_dodge(.9), width=.2)+
  scale_x_discrete(labels = labels)+
  labs(y= "Share of Respondents", x = "")+
  scale_fill_manual("Perceived \nsorting of \nsocial group", labels = c("Group mainly \nLeave", "Group mainly \nRemain"), values=c("grey30", "grey70"))+
  ylim(0.0,0.8)+
  coord_flip()+
  theme_minimal()

ggsave(width = 16,
       height = 10,
       units = "cm", 
       dpi = 300,
       device="png",
       filename = "vote_prot_it_2.png", 
       path = path)


####Sympathy towards different social groups 



group_soc_it <- dat_it %>% 
  dplyr::select(Group, !!variables_groups) %>% 
  pivot_longer(cols = variables_groups) %>%
  filter(value!="") %>% 
  group_by(Group, name) %>% 
  summarise(mean = mean(as.numeric(value)),
            sd = sd(as.numeric(value)), 
            n = n()) %>% 
  mutate(ci_upper = mean + (1.96 *(sd/sqrt(n))),
         ci_lower = mean - (1.96*(sd)/sqrt(n)))


###Graphic about group closeness of different voter groups to social groups (Figure 2 main paper Italy)

ggplot(group_soc_it, aes(fill=Group ,x=factor(name, levels=variables_groups), y=mean))+
  geom_bar(position="dodge", stat="identity")+
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), position=position_dodge(.9), width=.2)+
  scale_fill_manual("Respondent’s own \nvote choice" , labels = c("Don't Know", "Leave", "Remain"), values=c("grey30", "grey50", "grey70")
  )+
  scale_x_discrete(labels=soc_group)+
  labs(y="Closeness", x = "")+
  ylim(0,10)+
  coord_flip()+
  theme_minimal()

ggsave(width = 16,
       height = 10,
       units = "cm", 
       dpi = 300,
       device="png",
       filename = "soc_close_it.png", 
       path = path)




full_vote_mod_it <- glm(vote_remain~close_all+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat_it, family = binomial(link="logit"))

full_vote_leave_it <- glm(vote_leave~close_all+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat_it, family = binomial(link="logit"))



mod_eu_0_it <- lm(european ~ close_all, dat=dat_it)
mod_eu_1_it <- lm(european ~ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat_it)
mod_eu_2_it <- lm(european ~ close_all+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat_it)
mod_eu_3_it <- lm(european ~ as.numeric(close_young)+ as.numeric(close_old)+ as.numeric(close_uni) + as.numeric(close_no_uni) + as.numeric(close_rural) + as.numeric(close_urban)+ as.numeric(close_unempl) + as.numeric(close_women)+ as.numeric(close_men)+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat_it)


mod_nat_0_it <- lm(national ~ close_all, dat=dat_it)
mod_nat_1_it <- lm(national ~  as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat_it)
mod_nat_2_it <- lm(national ~ close_all+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat_it)
mod_nat_3_it <- lm(national ~ as.numeric(close_young)+ as.numeric(close_old)+ as.numeric(close_uni) + as.numeric(close_no_uni) + as.numeric(close_rural) + as.numeric(close_urban)+ as.numeric(close_unempl) + as.numeric(close_women)+ as.numeric(close_men)+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat_it)


#### Table 3 main paper and Table 6 in appendix

###Short 

stargazer(mod_eu_0_it, mod_eu_1_it, mod_eu_2_it, mod_eu_3_it,style = "apsr", type="latex", out = paste0(path, "eu_main_models_it.tex"),
          column.labels = c("Model 1", "Model 2", "Model 3"),
          keep = c("close_all", "close_young", "close_old", "close_uni", "close_no_uni", "close_rural", "close_urban", "close_unempl", "close_women", "close_men"), 
          covariate.labels = c("Group closeness score", "Young", "Old", "University", "No University", "Rural", "Urban", "Unemployed", "Women", "Men"))

###Long

stargazer(mod_eu_1_it, mod_eu_2_it, mod_eu_3_it,style = "apsr", type="latex", out = paste0(path, "eu_main_models_it_long.tex"),
          column.labels = c("Model 2", "Model 3", "Model 4"))

###Short

stargazer(mod_nat_0_it, mod_nat_1_it, mod_nat_2_it, mod_nat_3_it, style = "apsr", type="latex", out = paste0(path, "nat_main_models_it.tex"),
          column.labels = c("Model 1", "Model 2", "Model 3", "Model 4"),
          keep = c("close_all", "close_young", "close_old", "close_uni", "close_no_uni", "close_rural", "close_urban", "close_unempl", "close_women", "close_men"), 
          covariate.labels = c("Group closeness score", "Young", "Old", "University", "No University", "Rural", "Urban", "Unemployed", "Women", "Men"))

###Long
stargazer(mod_nat_1_it, mod_nat_2_it, mod_nat_3_it, style = "apsr", type="latex", out = paste0(path, "nat_main_models_it_long.tex"),
          column.labels = c("Model 2", "Model 3", "Model 4"))
    




#### For id plot with individual closeness 

mod_eu_4_it <- lm(european ~ sort_id_young +sort_id_old+ sort_id_uni + sort_id_no_uni + sort_id_rural + sort_id_urban + sort_id_unemployed + sort_id_women + sort_id_men
                  + as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back
                  , dat=dat_it)





mod_it_ind <- broom::tidy(mod_eu_4_it) %>% 
  filter(term=="Q69_1_num"|term=="sort_id_old"|term=="sort_id_uni"|term=="sort_id_no_uni"|term=="sort_id_rural"|term=="sort_id_urban"|term=="sort_id_unemployed"|term=="sort_id_women"|term=="sort_id_men")

#### Figure 5 appendix Italy


sort_it <- dwplot(mod_it_ind,
                  vline = geom_vline(xintercept=0, colour="grey60", linetype=2)) %>% 
  relabel_predictors(c(
    "sort_id_young" = "Old",
    "sort_id_old" = "Young", 
    "sort_id_uni" = "University", 
    "sort_id_no_uni" = "No university", 
    "sort_id_rural" = "Rural",
    "sort_id_urban" = "Urban",
    "sort_id_unemployed" = "Unemployed",
    "sort_id_women" = "Women", 
    "sort_id_men" = "Men"
  )) +
  scale_color_grey(name="Italy", labels = "Coefficients")+
  theme_minimal()

ggsave(sort_it, 
       width = 16,
       height = 10,
       units = "cm", 
       dpi = 300,
       device="png",
       filename = "sort_ind_it.png", 
       path = path)


### Austria




mod_eu_4_at <- lm(european ~ sort_id_young +sort_id_old+ sort_id_uni + sort_id_no_uni + sort_id_rural + sort_id_urban + sort_id_unemployed + sort_id_women + sort_id_men
                  + as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back
                  , dat=dat)



mod_at_ind <- broom::tidy(mod_eu_4_at) %>% 
  filter(term=="sort_id_young"|term=="sort_id_old"|term=="sort_id_uni"|term=="sort_id_no_uni"|term=="sort_id_rural"|term=="sort_id_urban"|term=="sort_id_unemployed"|term=="sort_id_women"|term=="sort_id_men")


#### Figure 5 appendix Austria

sort_at <- dwplot(mod_at_ind,
                  vline = geom_vline(xintercept=0, colour="grey60", linetype=2)) %>% 
  relabel_predictors(c(
    "sort_id_young" = "Old",
    "sort_id_old" = "Young", 
    "sort_id_uni" = "University", 
    "sort_id_no_uni" = "No university", 
    "sort_id_rural" = "Rural",
    "sort_id_urban" = "Urban",
    "sort_id_unemployed" = "Unemployed",
    "sort_id_women" = "Women", 
    "sort_id_men" = "Men"
  )) + 
  scale_color_grey(name="Austria", labels = "Coefficients")+
  theme_minimal()




ggsave(sort_at, 
       width = 16,
       height = 10,
       units = "cm", 
       dpi = 300,
       device="png",
       filename = "sort_ind_at.png", 
       path = path)



mod_nat_0_it <- lm(national ~  close_all, dat=dat_it)

mod_nat_1_it <- lm(national ~  as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat_it)

mod_nat_2_it <- lm(national ~ close_all+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat_it)

mod_nat_3_it <- lm(national ~ as.numeric(close_young)+ as.numeric(close_old)+ as.numeric(close_uni) + as.numeric(close_no_uni) + as.numeric(close_rural) + as.numeric(close_urban)+ as.numeric(close_unempl) + as.numeric(close_women)+ as.numeric(close_men)+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote, dat=dat_it)


###### Table 4 in main paper 

stargazer(full_vote_mod, full_vote_mod_it, full_vote_leave, full_vote_leave_it, style = "apsr", type="latex", out=paste0(path, "vote_models_at_it.tex"),
          column.labels = c("Vote Remain (AT)", "Vote Remain (IT)", "Vote Leave (AT)", "Vote Leave (IT)"),
          keep = "close_all", 
          covariate.labels = "Group Closeness Score")





#### Creating marginal effects plot 

cdat_1<- cplot(full_vote_leave, "close_all", draw = FALSE)

cdat_2<- cplot(full_vote_leave_it, "close_all", draw = FALSE)


cdat_3<- cplot(full_vote_mod, "close_all", draw = FALSE)

cdat_4<- cplot(full_vote_mod_it, "close_all", draw = FALSE)

cdat_1$country <- "Austria"

cdat_2$country <- "Italy"

cdat_3$country <- "Austria"

cdat_4$country <- "Italy"

cdat <- rbind(cdat_1, cdat_2) 
cdat_rem <- rbind(cdat_3, cdat_4) 

leave_plot <- ggplot(cdat, aes(Group = country, fill = country)) + 
  geom_line(aes(x = xvals, y = yvals)) +
  geom_ribbon(aes(ymin = lower, ymax = upper, x= xvals), linetype = 2, alpha = 0.2) +
  geom_hline(yintercept = 0) +
  ylim(-0.01,1)+
  xlab("Group closeness score") + 
  ylab("Predicted probablity to vote leave")+
  scale_fill_manual(name = "Country", values =c("#3CBB75FF", "#481567FF"))+
  theme_minimal()


rem_plot<- ggplot(cdat_rem, aes(Group = country, fill = country)) + 
  geom_line(aes(x = xvals, y = yvals)) +
  geom_ribbon(aes(ymin = lower, ymax = upper, x= xvals), linetype = 2, alpha = 0.2) +
  geom_hline(yintercept = 0) +
  ylim(-0.01,1)+
  xlab("Group closeness score") + 
  ylab("Predicted probablity to vote remain")+
  scale_fill_manual(name = "Country", values =c("#3CBB75FF", "#481567FF"))+
  theme_minimal()


### Figure 5 in main paper 

ggarrange(rem_plot, leave_plot, 
          common.legend = TRUE, legend = "bottom")

ggsave(width = 16,
       height = 10,
       units = "cm", 
       dpi = 300,
       device="png",
       filename = "pred_vote.png", 
       path =path)





#### Histogram of group closeness score 

hist_it_close <- ggplot()+
  geom_density(data=dat_it, aes(x=close_all))+
  labs(x="GCS (IT)", y="Density")+
  theme_minimal()

hist_at_close <- ggplot()+
  geom_density(data=dat, aes(x=close_all))+
  labs(x="GCS (AT)", y="Density")+
  theme_minimal()

#### Figure 3 in Appendix 

ggarrange(hist_it_close, hist_at_close)


ggsave(width = 16,
       height = 10,
       units = "cm", 
       dpi = 300,
       device="png",
       filename = "density_gcs.png", 
       path =path)



### Other margins plot on EU ID

eu_prob_at <- cplot(mod_eu_2, "close_all", draw = FALSE)
eu_prob_it <- cplot(mod_eu_2_it, "close_all", draw = FALSE)

eu_prob_at$country <- "Austria"
eu_prob_it$country <- "Italy"

eu_prob <- rbind(eu_prob_at, eu_prob_it)

#### Figure 4 main paper 

eu_plot<- ggplot(eu_prob, aes(Group = country, fill = country)) + 
  geom_line(aes(x = xvals, y = yvals)) +
  geom_ribbon(aes(ymin = lower, ymax = upper, x= xvals), linetype = 2, alpha = 0.3) +
  geom_hline(yintercept = 0) +
  ylim(-0.01,1)+
  xlab("Group closeness score") + 
  ylab("Predicted European identity")+
  scale_fill_manual(name = "Country", values = c("#3CBB75FF", "#481567FF"))+
  theme_minimal()




ggsave(eu_plot,
       width = 16,
       height = 10,
       units = "cm", 
       dpi = 300,
       device="png",
       filename = "eu_pred_prob.png", 
       path =path)




### SUmmary statistics 


dat_it$survey_length<- as.numeric(as.POSIXlt(dat_it$EndDate) - as.POSIXlt(dat_it$StartDate))


dat_it$age <- as.numeric(dat_it$age)
dat_it$party_vote <- as.factor(dat_it$party_vote)
dat_it$occupation_stat <- as.factor(dat_it$occupation_stat)

dat$age <- as.numeric(dat$age)
dat$party_vote <- as.factor(dat$party_vote)
dat$occupation_stat <- as.factor(dat$occupation_stat)


tmp_it <- dat_it %>% 
  dplyr::select(c("Group", "age", "gender", "edu", "urban_rural", "occupation_stat", "migrat_back", "degree_travel", "nat_id_alt", "eu_id_alt", "party_vote", "national", "european"))


tmp_at <- dat %>% 
  dplyr::select(c("Group", "age", "gender", "edu", "urban_rural", "occupation_stat", "migrat_back", "degree_travel","nat_id_alt", "eu_id_alt", "party_vote", "national", "european"))



### Tables 1 and 2 appendix 
st(tmp_at, out = "latex")
st(tmp_it, out = "latex")


#### Correlation matrixes


cor.test(dat$european, dat$eu_id_alt)
cor.test(dat$national, dat$nat_id_alt )

cor.test(dat_it$european, dat_it$eu_id_alt)
cor.test(dat_it$national, dat_it$nat_id_alt )



dat_id_at <- dat %>% 
  dplyr::select(c("nat_id_alt", "eu_id_alt", "european", "national"))

dat_id_it <- dat_it %>% 
  dplyr::select(c("nat_id_alt", "eu_id_alt", "european", "national"))


dat_id_it <- drop_na(dat_id_it)


mat_at <- cor(dat_id_at)
mat_it <- cor(dat_id_it)

mat_at[upper.tri(mat_at)] <- ""
mat_at <- as.data.frame(mat_at)

### Table 3 appendix 
print(xtable(mat_at), type="latex")

mat_it[upper.tri(mat_it)] <- ""
mat_it <- as.data.frame(mat_it)


### Table 4 appendix 
print(xtable(mat_it), type="latex")


#### Most important issue


dat <- tidyr::separate(dat,
                           col=most_important_issue,
                           into = c("issue_1", "issue_2"),
                           sep = ",")

is_1 <- dat$issue_1
is_2 <- dat$issue_2

issue_import <- rbind(is_1, is_2)

tmp <- as.data.frame(prop.table(table(issue_import)))


tmp$issue_import<- recode_factor(tmp$issue_import, 
                                 "Andere" = "Other",
                                 "Arbeitslosigkeit" = "Unemployment", 
                                 "Besteuerung" = "Taxation", 
                                 "Bildungs- und Ausbildungssystem" = "Education", 
                                 "Diskriminierung von AusländerInnen" = "Discrimination", 
                                 "Einwanderung" = "Immigration", 
                                 "Frauenrechte" = "Womens Rights", 
                                 "Gesundheit" = "Health", 
                                 "Keine" = "Nothing", 
                                 "Kriminalität" = "Crime", 
                                 "Pensionen" = "Pensions", 
                                 "Steigende Preise / Inflation / Lebenshaltungskosten" = "Inflation",
                                 "Terrorismus" = "Terrorism",
                                 "Umwelt und Klimawandel" = "Environment",
                                 "Weiß nicht" = "Don`t know",
                                 "Wohnbau/Wohnungsbeschaffung" = "Housing", 
                                 "Wirtschaftliche Situation" = "Economic situation")



tmp <- tmp[order(tmp$Freq, decreasing = TRUE),]

### Figure 4 appendix Austria

imp_issue_at <- ggplot(tmp, aes(x = reorder(issue_import, Freq), y=Freq))+
  geom_bar(position="dodge", stat="identity")+
  labs(y="Percent", x = "Most important issue")+
  coord_flip()+
  theme_minimal()

ggsave(imp_issue_at,
       width = 16,
       height = 10,
       units = "cm", 
       dpi = 300,
       device="png",
       filename = "imp_iss_at.png", 
       path = path)



dat_it <- tidyr::separate(dat_it,
                              col=most_important_issue,
                              into = c("issue_1", "issue_2"),
                              sep = ",")

is_1_it <- dat_it$issue_1
is_2_it <- dat_it$issue_2

tmp_ita <- rbind(is_1_it, is_2_it)

tmp_ita <- as.data.frame(prop.table(table(tmp_ita)))

tmp_ita$issue_import<- recode_factor(tmp_ita$tmp_ita, 
                                     "Altro" = "Other",
                                     "La disoccupazione" = "Unemployment", 
                                     "La tassazione" = "Taxation", 
                                     "Il sistema scolastico" = "Education", 
                                     "La discriminazione degli stranieri" = "Discrimination", 
                                     "L’immigrazione" = "Immigration", 
                                     "I diritti delle donne" = "Womens Rights", 
                                     "La salute" = "Health", 
                                     "Niente" = "Nothing", 
                                     "La criminalità" = "Crime", 
                                     "La pensione" = "Pensions", 
                                     "L’aumento dei prezzi/ il costo della vita/ inflazione" = "Inflation",
                                     "Il terrorismo" = "Terrorism",
                                     "L'ambiente e il cambiamento climatico" = "Environment",
                                     "Non so" = "Don`t know",
                                     "Il settore immobiliare" = "Housing", 
                                     "La situazione economica" = "Economic situation", 
                                     "L’Unione Europea" = "EU")

### Figure 4 appendix Italy

imp_iss_ita <- ggplot(tmp_ita, aes(x = reorder(issue_import, Freq), y=Freq))+
  geom_bar(position="dodge", stat="identity")+
  labs(y="Percent", x = "Most important issue")+
  coord_flip()+
  theme_minimal()

ggsave(imp_iss_ita,
       width = 16,
       height = 10,
       units = "cm", 
       dpi = 300,
       device="png",
       filename = "imp_iss_ita.png", 
       path = path)


####Boxplots - national identity - european identity #######



plot_box_nat_at<-dat %>% 
  ggplot( aes(x=as.factor(Group), y=national))+
  geom_violin()+
  stat_summary(fun = "mean",
               geom = "point",
               color = "red")+
  scale_x_discrete(labels=c("Non-Voters", "Leave", "Remain"))+
  labs(x="Vote Choice (AT)", y="National identity")+
  theme_minimal()+
  theme(legend.key = element_rect(colour = "transparent", fill = "white"))



plot_box_eu_at <-dat %>% 
  ggplot( aes(x=as.factor(Group), y=european))+
  geom_violin()+
  stat_summary(fun = "mean",
               geom = "point",
               color = "red")+
  scale_x_discrete(labels=c("Non-Voters", "Leave", "Remain"))+
  labs(x="Vote Choice (AT)", y="European identity")+
  theme_minimal()+
  theme(legend.key = element_rect(colour = "transparent", fill = "white"))



plot_box_nat_it<-dat_it %>% 
  ggplot( aes(x=as.factor(Group), y=national))+
  geom_violin()+
  stat_summary(fun = "mean",
               geom = "point",
               color = "red")+
  scale_x_discrete(labels=c("Non-Voters", "Leave", "Remain"))+
  labs(x="Vote Choice (IT)", y="National identity")+
  theme_minimal()+
  theme(legend.key = element_rect(colour = "transparent", fill = "white"))



plot_box_eu_it <-dat_it %>% 
  ggplot( aes(x=as.factor(Group), y=european))+
  geom_violin()+
  stat_summary(fun = "mean",
               geom = "point",
               color = "red")+
  scale_x_discrete(labels=c("Non-Voters", "Leave", "Remain"))+
  labs(x="Vote Choice (IT)", y="European identity")+
  theme_minimal()+
  theme(legend.key = element_rect(colour = "transparent", fill = "white"))

### Figure 2 in appendix 



box_plots<- ggarrange(plot_box_eu_it, plot_box_eu_at,
                      plot_box_nat_it, plot_box_nat_at)


ggsave(box_plots,
       width = 16,
       height = 10,
       units = "cm", 
       dpi = 300,
       device="png",
       filename = "box_plots_ids.png", 
       path = path)



########How objective group membership relates to subjective group membership##########

x <- c()

######Young 
x[1]<- mean(as.numeric(dat$close_young[dat$age > median(dat$age)], na.rm=TRUE))
x[2]<- mean(as.numeric(dat$close_young[dat$age <= median(dat$age)], na.rm=TRUE))

######Old#####
x[3]<- mean(as.numeric(dat$close_old[dat$age > median(dat$age)], na.rm=TRUE))
x[4]<- mean(as.numeric(dat$close_old[dat$age <= median(dat$age)], na.rm=TRUE))

######University degree #######
x[5]<- mean(as.numeric(dat$close_uni[dat$edu == "Abschluss mit Doktorat (aufbauend auf Diplomstudienabschluss: Dr., PhD)"], na.rm=TRUE))
x[6]<- mean(as.numeric(dat$close_uni[dat$edu != "Abschluss mit Doktorat (aufbauend auf Diplomstudienabschluss: Dr., PhD)"], na.rm=TRUE))


######No University degree #######
x[7]<- mean(as.numeric(dat$close_no_uni[dat$edu == "Abschluss mit Doktorat (aufbauend auf Diplomstudienabschluss: Dr., PhD)"], na.rm=TRUE))
x[8]<- mean(as.numeric(dat$close_no_uni[dat$edu != "Abschluss mit Doktorat (aufbauend auf Diplomstudienabschluss: Dr., PhD)"], na.rm=TRUE))


######Women #######
x[9]<- mean(as.numeric(dat$close_women[dat$gender == "Weiblich"], na.rm=TRUE))
x[10]<- mean(as.numeric(dat$close_women[dat$gender == "Männlich"], na.rm=TRUE))


######Men #######
x[11]<- mean(as.numeric(dat$close_men[dat$gender == "Weiblich"], na.rm=TRUE))
x[12]<- mean(as.numeric(dat$close_men[dat$gender == "Männlich"], na.rm=TRUE))


########Urban ########

x[13]<- mean(as.numeric(dat$close_urban[dat$urban_rural  == "einer mittelgrossen Stadt bis Grossstadt (ab 10.000 Einwohner)"], na.rm=TRUE))
x[14]<- mean(as.numeric(dat$close_urban[dat$urban_rural  != "einer mittelgrossen Stadt bis Grossstadt (ab 10.000 Einwohner)"], na.rm=TRUE))

########Rural ########

x[15]<- mean(as.numeric(dat$close_rural[dat$urban_rural  == "einer mittelgrossen Stadt bis Grossstadt (ab 10.000 Einwohner)"], na.rm=TRUE))
x[16]<- mean(as.numeric(dat$close_rural[dat$urban_rural  != "einer mittelgrossen Stadt bis Grossstadt (ab 10.000 Einwohner)"], na.rm=TRUE))


#####Unemployed 

x[17]<- mean(as.numeric(dat$close_unempl[!str_detect(dat$occupation_stat, "arbeitslos")]), na.rm = TRUE)
x[18]<- mean(as.numeric(dat$close_unempl[str_detect(dat$occupation_stat, "arbeitslos")]), na.rm = TRUE)


###SD 
s <- c()

s[1]<- sd(as.numeric(dat$close_young[dat$age > median(dat$age)], na.rm=TRUE))
s[2]<- sd(as.numeric(dat$close_young[dat$age <= median(dat$age)], na.rm=TRUE))

######Old#####
s[3]<- sd(as.numeric(dat$close_old[dat$age > median(dat$age)], na.rm=TRUE))
s[4]<- sd(as.numeric(dat$close_old[dat$age <= median(dat$age)], na.rm=TRUE))

######University degree #######
s[5]<- sd(as.numeric(dat$close_uni[dat$edu == "Abschluss mit Doktorat (aufbauend auf Diplomstudienabschluss: Dr., PhD)"], na.rm=TRUE))
s[6]<- sd(as.numeric(dat$close_uni[dat$edu != "Abschluss mit Doktorat (aufbauend auf Diplomstudienabschluss: Dr., PhD)"], na.rm=TRUE))


######No University degree #######
s[7]<- sd(as.numeric(dat$close_no_uni[dat$edu == "Abschluss mit Doktorat (aufbauend auf Diplomstudienabschluss: Dr., PhD)"], na.rm=TRUE))
s[8]<- sd(as.numeric(dat$close_no_uni[dat$edu != "Abschluss mit Doktorat (aufbauend auf Diplomstudienabschluss: Dr., PhD)"], na.rm=TRUE))


######Women #######
s[9]<- sd(as.numeric(dat$close_women[dat$gender == "Weiblich"], na.rm=TRUE))
s[10]<- sd(as.numeric(dat$close_women[dat$gender == "Männlich"], na.rm=TRUE))


######Men #######
s[11]<- sd(as.numeric(dat$close_men[dat$gender == "Weiblich"], na.rm=TRUE))
s[12]<- sd(as.numeric(dat$close_men[dat$gender == "Männlich"], na.rm=TRUE))


########Urban ########

s[13]<- sd(as.numeric(dat$close_urban[dat$urban_rural  == "einer mittelgrossen Stadt bis Grossstadt (ab 10.000 Einwohner)"], na.rm=TRUE))
s[14]<- sd(as.numeric(dat$close_urban[dat$urban_rural  != "einer mittelgrossen Stadt bis Grossstadt (ab 10.000 Einwohner)"], na.rm=TRUE))

########Rural ########

s[15]<- sd(as.numeric(dat$close_rural[dat$urban_rural  == "einer mittelgrossen Stadt bis Grossstadt (ab 10.000 Einwohner)"], na.rm=TRUE))
s[16]<- sd(as.numeric(dat$close_rural[dat$urban_rural  != "einer mittelgrossen Stadt bis Grossstadt (ab 10.000 Einwohner)"], na.rm=TRUE))


#####Unemployed 

s[17]<- sd(as.numeric(dat$close_unempl[!str_detect(dat$occupation_stat, "arbeitslos")]), na.rm = TRUE)
s[18]<- sd(as.numeric(dat$close_unempl[str_detect(dat$occupation_stat, "arbeitslos")]), na.rm = TRUE)


#########length##########################################

n <- c()

n[1]<- length(as.numeric(dat$close_young[dat$age > median(dat$age)], na.rm=TRUE))
n[2]<- length(as.numeric(dat$close_young[dat$age <= median(dat$age)], na.rm=TRUE))

######Old#####
n[3]<- length(as.numeric(dat$close_old[dat$age > median(dat$age)], na.rm=TRUE))
n[4]<- length(as.numeric(dat$close_old[dat$age <= median(dat$age)], na.rm=TRUE))

######University degree #######
n[5]<- length(as.numeric(dat$close_uni[dat$edu == "Abschluss mit Doktorat (aufbauend auf Diplomstudienabschluss: Dr., PhD)"], na.rm=TRUE))
n[6]<- length(as.numeric(dat$close_uni[dat$edu != "Abschluss mit Doktorat (aufbauend auf Diplomstudienabschluss: Dr., PhD)"], na.rm=TRUE))


######No University degree #######
n[7]<- length(as.numeric(dat$close_no_uni[dat$edu == "Abschluss mit Doktorat (aufbauend auf Diplomstudienabschluss: Dr., PhD)"], na.rm=TRUE))
n[8]<- length(as.numeric(dat$close_no_uni[dat$edu != "Abschluss mit Doktorat (aufbauend auf Diplomstudienabschluss: Dr., PhD)"], na.rm=TRUE))


######Women #######
n[9]<- length(as.numeric(dat$close_women[dat$gender == "Weiblich"], na.rm=TRUE))
n[10]<- length(as.numeric(dat$close_women[dat$gender == "Männlich"], na.rm=TRUE))


######Men #######
n[11]<- length(as.numeric(dat$close_men[dat$gender == "Weiblich"], na.rm=TRUE))
n[12]<- length(as.numeric(dat$close_men[dat$gender == "Männlich"], na.rm=TRUE))


########Urban ########

n[13]<- length(as.numeric(dat$close_urban[dat$urban_rural  == "einer mittelgrossen Stadt bis Grossstadt (ab 10.000 Einwohner)"], na.rm=TRUE))
n[14]<- length(as.numeric(dat$close_urban[dat$urban_rural  != "einer mittelgrossen Stadt bis Grossstadt (ab 10.000 Einwohner)"], na.rm=TRUE))

########Rural ########

n[15]<- length(as.numeric(dat$close_rural[dat$urban_rural  == "einer mittelgrossen Stadt bis Grossstadt (ab 10.000 Einwohner)"], na.rm=TRUE))
n[16]<- length(as.numeric(dat$close_rural[dat$urban_rural  != "einer mittelgrossen Stadt bis Grossstadt (ab 10.000 Einwohner)"], na.rm=TRUE))


#####Unemployed 

n[17]<- length(as.numeric(dat$close_unempl[!(stringr::str_detect(dat$occupation_stat, "arbeitslos"))]))
n[18]<- length(as.numeric(dat$close_unempl[stringr::str_detect(dat$occupation_stat, "arbeitslos")]))


y <- c("Old", "Young", "Old", "Young", "University degree", "No university degree",  "University degree", "No university degree", "Women", "Men", "Women", "Men", "Urban", "Rural", "Urban", "Rural", "Not unemployed", "Unemployed")

z <- c("Young", "Young", "Old", "Old", "University degree", "University degree", "No university degree", "No university degree", "Women", "Women", "Men", "Men", "Urban", "Urban", "Rural", "Rural", "Unemployed", "Unemployed")                



means_dat <- as.data.frame(cbind(x,y,z, s, n))

names(means_dat) <- c("means", "objective_group", "subjective_group", "sd_g", "n_g")

means_dat$upper <- as.numeric(means_dat$means)+qt(1 - (0.05 / 2), df=(as.numeric(means_dat$n_g)- 1))*(as.numeric(means_dat$sd_g)/sqrt(as.numeric(means_dat$n_g)))

means_dat$lower <- as.numeric(means_dat$means)-qt(1 - (0.05 / 2), df=(as.numeric(means_dat$n_g)- 1))*(as.numeric(means_dat$sd_g)/sqrt(as.numeric(means_dat$n_g)))

means_dat$subjective_group <- factor(means_dat$subjective_group, levels = unique(means_dat$subjective_group))

means_dat$in_out <- ifelse(means_dat$objective_group==means_dat$subjective_group, "In-group", "Out-group")

### Figure 1 in main paper (Austria)

means_dat %>% 
  ggplot(., aes(fill=in_out, x=subjective_group, y=as.numeric(means), shape=in_out))+
  geom_point(size=2)+
  geom_pointrange(aes(ymin=lower, ymax=upper, fill=in_out, shape=in_out))+
  scale_y_continuous()+
  ylim(0,10)+
  coord_flip()+
  theme_minimal()+
  theme(legend.position="bottom")+
  labs(x="", y="Subj. Group Identity", fill = "Objective group\n membership", shape = "Objective group\n membership")


ggsave(
  width = 16,
  height = 10,
  units = "cm", 
  dpi = 300,
  device="png",
  filename = "obj_subj_at.png", 
  path = path)





x_it <- c()


dat_it$age <- as.numeric(dat_it$age)
######Young 
x_it[1]<- mean(as.numeric(dat_it$close_young[dat_it$age > median(dat_it$age, na.rm = TRUE)]), na.rm=T)
x_it[2]<- mean(as.numeric(dat_it$close_young[dat_it$age <= median(dat_it$age, na.rm = TRUE)]), na.rm=TRUE)


######Old#####
x_it[3]<- mean(as.numeric(dat_it$close_old[dat_it$age > median(dat_it$age, na.rm = TRUE)]), na.rm=TRUE)
x_it[4]<- mean(as.numeric(dat_it$close_old[dat_it$age <= median(dat_it$age, na.rm = T)]), na.rm=TRUE)

######University degree #######
x_it[5]<- mean(as.numeric(dat_it$close_uni[dat_it$edu == "Dottorato di ricerca / Diploma accademico di formazione alla ricerca (AFAM)"]), na.rm=TRUE)
x_it[6]<- mean(as.numeric(dat_it$close_uni[dat_it$edu != "Dottorato di ricerca / Diploma accademico di formazione alla ricerca (AFAM)"]), na.rm=TRUE)


######No University degree #######
x_it[7]<- mean(as.numeric(dat_it$close_no_uni[dat_it$edu == "Dottorato di ricerca / Diploma accademico di formazione alla ricerca (AFAM)"]), na.rm=TRUE)
x_it[8]<- mean(as.numeric(dat_it$close_no_uni[dat_it$edu != "Dottorato di ricerca / Diploma accademico di formazione alla ricerca (AFAM)"]), na.rm=TRUE)


######Women #######
x_it[9]<- mean(as.numeric(dat_it$close_women[dat_it$gender == "Femmina"]), na.rm=TRUE)
x_it[10]<- mean(as.numeric(dat_it$close_women[dat_it$gender == "Maschio"]), na.rm=TRUE)


######Men #######
x_it[11]<- mean(as.numeric(dat_it$close_men[dat_it$gender == "Femmina"]), na.rm=TRUE)
x_it[12]<- mean(as.numeric(dat_it$close_men[dat_it$gender == "Maschio"]), na.rm=TRUE)


########Urban ########

x_it[13]<- mean(as.numeric(dat_it$close_urban[dat_it$urban_rural  == "Una grande città (più di 10000 abitanti)"]), na.rm=TRUE)
x_it[14]<- mean(as.numeric(dat_it$close_urban[dat_it$urban_rural  != "Una grande città (più di 10000 abitanti)"]), na.rm=TRUE)

########Rural ########

x_it[15]<- mean(as.numeric(dat_it$close_rural[dat_it$urban_rural  == "Una grande città (più di 10000 abitanti)"]), na.rm=TRUE)
x_it[16]<- mean(as.numeric(dat_it$close_rural[dat_it$urban_rural  != "Una grande città (più di 10000 abitanti)"]), na.rm=TRUE)


#####Unemployed 

x_it[17]<- mean(as.numeric(dat_it$close_unempl[!str_detect(dat_it$occupation_stat, "Disoccupato/a")]), na.rm = TRUE)
x_it[18]<- mean(as.numeric(dat_it$close_unempl[str_detect(dat_it$occupation_stat, "Disoccupato/a")]), na.rm = TRUE)

s_it <- c()

######Young 
s_it[1]<- sd(as.numeric(dat_it$close_young[dat_it$age > median(dat_it$age, na.rm = TRUE)]), na.rm=T)
s_it[2]<- sd(as.numeric(dat_it$close_young[dat_it$age <= median(dat_it$age, na.rm = TRUE)]), na.rm=TRUE)


######Old#####
s_it[3]<- sd(as.numeric(dat_it$close_old[dat_it$age > median(dat_it$age, na.rm = TRUE)]), na.rm=TRUE)
s_it[4]<- sd(as.numeric(dat_it$close_old[dat_it$age <= median(dat_it$age, na.rm = T)]), na.rm=TRUE)

######University degree #######
s_it[5]<- sd(as.numeric(dat_it$close_uni[dat_it$edu == "Dottorato di ricerca / Diploma accademico di formazione alla ricerca (AFAM)"]), na.rm=TRUE)
s_it[6]<- sd(as.numeric(dat_it$close_uni[dat_it$edu != "Dottorato di ricerca / Diploma accademico di formazione alla ricerca (AFAM)"]), na.rm=TRUE)


######No University degree #######
s_it[7]<- sd(as.numeric(dat_it$close_no_uni[dat_it$edu == "Dottorato di ricerca / Diploma accademico di formazione alla ricerca (AFAM)"]), na.rm=TRUE)
s_it[8]<- sd(as.numeric(dat_it$close_no_uni[dat_it$edu != "Dottorato di ricerca / Diploma accademico di formazione alla ricerca (AFAM)"]), na.rm=TRUE)


######Women #######
s_it[9]<- sd(as.numeric(dat_it$close_women[dat_it$gender == "Femmina"]), na.rm=TRUE)
s_it[10]<- sd(as.numeric(dat_it$close_women[dat_it$gender == "Maschio"]), na.rm=TRUE)


######Men #######
s_it[11]<- sd(as.numeric(dat_it$close_men[dat_it$gender == "Femmina"]), na.rm=TRUE)
s_it[12]<- sd(as.numeric(dat_it$close_men[dat_it$gender == "Maschio"]), na.rm=TRUE)


########Urban ########

s_it[13]<- sd(as.numeric(dat_it$close_urban[dat_it$urban_rural  == "Una grande città (più di 10000 abitanti)"]), na.rm=TRUE)
s_it[14]<- sd(as.numeric(dat_it$close_urban[dat_it$urban_rural  != "Una grande città (più di 10000 abitanti)"]), na.rm=TRUE)

########Rural ########

s_it[15]<- sd(as.numeric(dat_it$close_rural[dat_it$urban_rural  == "Una grande città (più di 10000 abitanti)"]), na.rm=TRUE)
s_it[16]<- sd(as.numeric(dat_it$close_rural[dat_it$urban_rural  != "Una grande città (più di 10000 abitanti)"]), na.rm=TRUE)


#####Unemployed 

s_it[17]<- sd(as.numeric(dat_it$close_unempl[!str_detect(dat_it$occupation_stat, "Disoccupato/a")]), na.rm = TRUE)
s_it[18]<- sd(as.numeric(dat_it$close_unempl[str_detect(dat_it$occupation_stat, "Disoccupato/a")]), na.rm = TRUE)


n_it <- c()

######Young 
n_it[1]<- length(as.numeric(dat_it$close_young[dat_it$age > median(dat_it$age, na.rm = TRUE)]))
n_it[2]<- length(as.numeric(dat_it$close_young[dat_it$age <= median(dat_it$age, na.rm = TRUE)]))


######Old#####
n_it[3]<- length(as.numeric(dat_it$close_old[dat_it$age > median(dat_it$age, na.rm = TRUE)]))
n_it[4]<- length(as.numeric(dat_it$close_old[dat_it$age <= median(dat_it$age, na.rm = T)]))

######University degree #######
n_it[5]<- length(as.numeric(dat_it$close_uni[dat_it$edu == "Dottorato di ricerca / Diploma accademico di formazione alla ricerca (AFAM)"]))
n_it[6]<- length(as.numeric(dat_it$close_uni[dat_it$edu != "Dottorato di ricerca / Diploma accademico di formazione alla ricerca (AFAM)"]))


######No University degree #######
n_it[7]<- length(as.numeric(dat_it$close_no_uni[dat_it$edu == "Dottorato di ricerca / Diploma accademico di formazione alla ricerca (AFAM)"]))
n_it[8]<- length(as.numeric(dat_it$close_no_uni[dat_it$edu != "Dottorato di ricerca / Diploma accademico di formazione alla ricerca (AFAM)"]))


######Women #######
n_it[9]<- length(as.numeric(dat_it$close_women[dat_it$gender == "Femmina"]))
n_it[10]<- length(as.numeric(dat_it$close_women[dat_it$gender == "Maschio"]))


######Men #######
n_it[11]<- length(as.numeric(dat_it$close_men[dat_it$gender == "Femmina"]))
n_it[12]<- length(as.numeric(dat_it$close_men[dat_it$gender == "Maschio"]))


########Urban ########

n_it[13]<- length(as.numeric(dat_it$close_urban[dat_it$urban_rural  == "Una grande città (più di 10000 abitanti)"]))
n_it[14]<- length(as.numeric(dat_it$close_urban[dat_it$urban_rural  != "Una grande città (più di 10000 abitanti)"]))

########Rural ########

n_it[15]<- length(as.numeric(dat_it$close_rural[dat_it$urban_rural  == "Una grande città (più di 10000 abitanti)"]))
n_it[16]<- length(as.numeric(dat_it$close_rural[dat_it$urban_rural  != "Una grande città (più di 10000 abitanti)"]))


#####Unemployed 

n_it[17]<- length(as.numeric(dat_it$close_unempl[!str_detect(dat_it$occupation_stat, "Disoccupato/a")]))
n_it[18]<- length(as.numeric(dat_it$close_unempl[str_detect(dat_it$occupation_stat, "Disoccupato/a")]))


y_it <- c("Old", "Young", "Old", "Young", "University degree", "No university degree",  "University degree", "No university degree", "Women", "Men", "Women", "Men", "Urban", "Rural", "Urban", "Rural", "Not unemployed", "Unemployed")

z_it <- c("Young", "Young", "Old", "Old", "University degree", "University degree", "No university degree", "No university degree", "Women", "Women", "Men", "Men", "Urban", "Urban", "Rural", "Rural", "Unemployed", "Unemployed")                

means_dat_it <- as.data.frame(cbind(x_it,y_it,z_it, s_it, n_it))

names(means_dat_it) <- c("means", "objective_group", "subjective_group", "sd_g", "n_g")



means_dat_it$subjective_group <- factor(means_dat_it$subjective_group, levels = unique(means_dat_it$subjective_group))


means_dat_it$upper <- as.numeric(means_dat_it$means)+qt(1 - (0.05 / 2), df=(as.numeric(means_dat_it$n_g)- 1))*(as.numeric(means_dat_it$sd_g)/sqrt(as.numeric(means_dat_it$n_g)))

means_dat_it$lower <- as.numeric(means_dat_it$means)-qt(1 - (0.05 / 2), df=(as.numeric(means_dat_it$n_g)- 1))*(as.numeric(means_dat_it$sd_g)/sqrt(as.numeric(means_dat_it$n_g)))


means_dat_it$in_out <- ifelse(means_dat_it$objective_group==means_dat_it$subjective_group, "In-group", "Out-group")


### Figure 1 in main paper (Italy)

means_dat_it %>% 
  ggplot(., aes(fill=in_out, x=subjective_group, y=as.numeric(means), shape=in_out))+
  geom_point(size=2)+
  geom_pointrange(aes(ymin=lower, ymax=upper, fill=in_out, shape=in_out))+
  scale_y_continuous()+
  ylim(0,10)+
  coord_flip()+
  theme_minimal()+
  theme(legend.position="bottom")+
  labs(x="", y="Subj. Group Identity", fill = "Objective group\n membership", shape = "Objective group\n membership")


ggsave(
  width = 16,
  height = 10,
  units = "cm", 
  dpi = 300,
  device="png",
  filename = "obj_subj_it.png", 
  path = path)



### Regional models as dats


#####Italy (north south)
mod_nat_1_it_reg <- lm(national ~  as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote + south_north , dat=dat_it)

mod_nat_2_it_reg <- lm(national ~ close_all+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote + south_north, dat=dat_it)

mod_nat_3_it_reg <- lm(national ~ as.numeric(close_young)+ as.numeric(close_old)+ as.numeric(close_uni) + as.numeric(close_no_uni) + as.numeric(close_rural) + as.numeric(close_urban)+ as.numeric(close_unempl) + as.numeric(close_women)+ as.numeric(close_men)+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote + south_north, dat=dat_it)



mod_eu_1_it_reg <- lm(european ~  as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote + south_north , dat=dat_it)

mod_eu_2_it_reg <- lm(european ~ close_all+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote + south_north, dat=dat_it)

mod_eu_3_it_reg <- lm(european ~ as.numeric(close_young)+ as.numeric(close_old)+ as.numeric(close_uni) + as.numeric(close_no_uni) + as.numeric(close_rural) + as.numeric(close_urban)+ as.numeric(close_unempl) + as.numeric(close_women)+ as.numeric(close_men)+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote + south_north, dat=dat_it)


####Austria - Vianna - Everything else

mod_eu_1_reg <- lm(european ~ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote+ vienna, dat=dat)
mod_eu_2_reg <- lm(european ~ close_all + as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote+ vienna, dat=dat)
mod_eu_3_reg <- lm(european ~ close_all+ as.numeric(close_young)+ as.numeric(close_old)+ as.numeric(close_uni) + as.numeric(close_no_uni) + as.numeric(close_rural) + as.numeric(close_urban)+ as.numeric(close_unempl) + as.numeric(close_women)+ as.numeric(close_men)+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote+ vienna, dat=dat)

mod_nat_1_reg <- lm(national ~ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote + vienna, dat=dat)
mod_nat_2_reg <- lm(national ~ close_all + as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote+ vienna, dat=dat)
mod_nat_3_reg <- lm(national ~ as.numeric(close_young)+ as.numeric(close_old)+ as.numeric(close_uni) + as.numeric(close_no_uni) + as.numeric(close_rural) + as.numeric(close_urban)+ as.numeric(close_unempl) + as.numeric(close_women)+ as.numeric(close_men)+ as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel+ party_vote+ vienna, dat=dat)

###Appendix tables 7 and 8 

stargazer(mod_eu_1_reg, mod_eu_2_reg, mod_eu_3_reg, type="latex", style="apsr", out=paste0(path, "table_reg_eu.tex"))

stargazer(mod_nat_1_reg, mod_nat_2_reg, mod_nat_3_reg, type="latex", style="apsr", out=paste0(path, "table_reg_nat.tex"))


stargazer(mod_eu_1_it_reg, mod_eu_2_it_reg, mod_eu_3_it_reg, type="latex", style="apsr", out= paste0(path, "table_reg_eu_it.tex"))

stargazer(mod_nat_1_it_reg, mod_nat_2_it_reg, mod_nat_3_it_reg, type="latex", style="apsr", out=paste0(path, "table_reg_nat_it.tex"))


### Corruption scandal analysis 

dat$EndDate <- ymd(as_date(dat$EndDate))

dat$corruption_scandal <- ifelse(dat$EndDate>="2021-10-06", 1, 0)

summary(as.factor(dat$corruption_scandal))

###relationship between social identities, own preferences and sorting perceptions 

mod_corruption <- lm(national~as.factor(corruption_scandal)+as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel, data = dat)
mod_corruption_eu <- lm(european~as.factor(corruption_scandal)+as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel, data = dat)


dat$oevp <- ifelse(str_detect(dat$party_vote, "ÖVP"), 1, 0)

mod_vote_oevp<- glm(oevp ~ as.factor(corruption_scandal)+as.numeric(age) + gender + edu +urban_rural  +occupation_stat + migrat_back+ degree_travel, data = dat, family = binomial)

####Appendix table 9 and 10

stargazer(mod_vote_oevp, type="latex", style="apsr", out=paste0(path, "table_reg_oevp_vote.tex"))
stargazer(mod_corruption_eu, type="latex", style="apsr", out=paste0(path, "table_corruption_EU_id.tex"))






